rm(list = ls())

library(tidyverse)
library(pbapply)
library(stargazer)
library(rdrobust)

## 

covs <- c('turnout_party_2009_btw', 'soz_vers_beschaeftigte_share',
          'pop_density_km2', 'migration_out_share')

## Define extra covs

covs_extra <- c('left_total_party_09',
                'right_total_party_09',
                'left_total_party_diff',
                'right_total_party_diff')

## Federal elections

bt <- readRDS("data/data_federal.rds") %>%
  filter(!is.na(treated)) %>% 
  filter(year > 2012)

## First differences

diff_df_bt <- pblapply("agg_left_party", function(o) {
  out <- bt %>%
    filter(year > 2012) %>%
    pivot_wider(values_from = o, names_from = 'year', id_cols = 'ags',
                names_prefix = 'o') %>%
    mutate(diff = o2017  - o2013) %>%  dplyr::select(ags, diff) 
  ## Rename
  colnames(out)[2] <- o
  
  ## Return this
  out
}) %>%
  reduce(left_join) %>%
  left_join(bt %>% dplyr::select(ags, pop_dec_09, applies_census,
                                 one_of(covs), state_id) %>%
              distinct(ags, .keep_all = T)) %>%
  mutate(runvar = (pop_dec_09 * -1) + 10000) %>%
  filter(applies_census == 1) %>% 
  filter(!state_id == '08')

## Def kernel function

kern <- function(x, bw) {
  
  z <- x/bw
  k <- ifelse(abs(z) < 1, 1-abs(z), 0)
  
}

## Add extra controls ##

extra_controls <- "data/covars_political.rds" %>% 
  read_rds()

diff_df_bt <- diff_df_bt %>% 
  left_join(extra_controls)

## Estimation
## First, get optimal BW via rdrobust

out <- rdrobust(y = diff_df_bt %>% pull(agg_left_party), 
                x = diff_df_bt$runvar, c = 0,
                covs = diff_df_bt[, covs])

bw <- out$bws[1]

## Define weights (akin to RD)

df_bt <- diff_df_bt %>% 
  filter(between(runvar, -bw, bw)) %>% 
  mutate(treat = runvar>0) %>% 
  mutate(w = kern(runvar, bw))

## Manual RDD

f <- "agg_left_party ~ runvar + treat + runvar*treat" %>% 
  paste0('+', paste0(covs, collapse = '+')) %>% 
  as.formula()
f_covs <- "agg_left_party ~ runvar + treat + runvar*treat" %>% 
  paste0('+', paste0(covs, collapse = '+')) %>% 
  paste0('+', paste0(covs_extra, collapse = '+')) %>% 
  as.formula()

m1_w <- lm(f,  data = df_bt, weights = df_bt$w)
m1_w_covs <- lm(f_covs,  data = df_bt, weights = df_bt$w)

## Get state election data

lt <- readRDS('data/data_state.rds') %>%
  filter(!is.na(treated)) %>%
  mutate(year = lubridate::year(date)) %>%
  mutate(state_id = substr(ags, 1, 2))

## Get info on states

lt <- read_rds("data/states_census.rds") %>%
  dplyr::select(applies_census, state_id, census_first_year) %>% 
  left_join(lt, .) %>%
  mutate(time_rel = year - census_first_year)

## Generate period rel. to treatment

periods_by_state <- lt %>%
  group_by(state_id) %>%
  summarise(l = list(unique(time_rel))) %>%
  unnest()
periods_by_state <- periods_by_state %>%
  group_by(state_id) %>%
  summarise(l_r = list(rank(l))) %>%
  unnest() %>% dplyr::select(-state_id) %>% 
  bind_cols(periods_by_state, .)
periods_by_state <- periods_by_state %>% 
  group_by(state_id) %>%
  mutate(l_r = l_r - max(l_r)) %>%
  filter(!is.na(l)) %>%
  rename(time_rel_period = l_r,
         time_rel = l)

## Merge this back to the main DF

lt <- lt %>%
  left_join(periods_by_state) %>%
  mutate(post = ifelse(time_rel_period > -1, 1, 0),
         treated_post = post * treated) %>% 
  filter(time_rel_period > -2)

## First difference

diff_df <- pblapply("agg_left", function(o) {
  out <- lt %>%
    filter(time_rel_period > -2) %>%
    mutate(time_rel_period = time_rel_period + 2)  %>% 
    pivot_wider(values_from = o, names_from = 'time_rel_period', 
                id_cols = 'ags',
                names_prefix = 'o') %>%
    mutate(diff = o2  - o1) %>%  dplyr::select(ags, diff) 
  ## Rename
  colnames(out)[2] <- o
  
  ## Return this
  out
}) %>%
  reduce(left_join) %>%
  left_join(lt %>% dplyr::select(ags, pop_dec_09, applies_census,
                                 one_of(covs)) %>%
              distinct(ags, .keep_all = T)) %>%
  mutate(runvar = (pop_dec_09 * -1) + 10000) %>%
  filter(applies_census == 1) %>%
  filter(!str_sub(ags, 1, 2) == '01') %>% ## Exclude SH
  filter(!(substr(ags, 1, 2) == '08')) ## Exclude BW

## Add additional covars

diff_df <- diff_df %>% 
  left_join(extra_controls)

## Use rdrobust to get the BW

out <- rdrobust(y = diff_df %>% pull(agg_left), 
                x = diff_df$runvar, c = 0,
                covs = diff_df[, covs])
bw <- out$bws[1]

## Weights

df <- diff_df %>% 
  filter(between(runvar, -bw, bw)) %>% 
  mutate(treat = runvar>0) %>% 
  mutate(w = kern(runvar, bw))

## Estimate

f <- "agg_left ~ runvar + treat + runvar*treat" %>% 
  paste0('+', paste0(covs, collapse = '+')) %>% 
  as.formula()

f_covs <- "agg_left ~ runvar + treat + runvar*treat" %>% 
  paste0('+', paste0(covs, collapse = '+')) %>% 
  paste0('+', paste0(covs_extra, collapse = '+')) %>% 
  as.formula()

m1_lt_w <- lm(f,
              data = df,
              weights = df$w)
m1_lt_w_covs <- lm(f_covs,
                   data = df,
                   weights = df$w)

## Municipal election data 

mu <- readRDS('data/data_municipal.rds') %>%
  filter(!is.na(treated)) %>%
  mutate(state_id = substr(ags, 1, 2)) %>% 
  filter(time_rel_period > -2)

## First differences

diff_df <- mu %>%
  mutate(time_rel_period = time_rel_period + 2)  %>% 
  pivot_wider(values_from = "agg_left", names_from = 'time_rel_period', 
              id_cols = 'ags',
              names_prefix = 'o') %>%
  mutate(diff = o2  - o1) %>%  dplyr::select(ags, diff) 

## Rename
colnames(diff_df)[2] <- "agg_left"

## Return this
diff_df <- diff_df %>%
  left_join(mu %>% dplyr::select(ags, pop_dec_09, applies_census,
                                 one_of(covs), state_id) %>%
              distinct(ags, .keep_all = T)) %>%
  mutate(runvar = (pop_dec_09 * -1) + 10000) %>%
  filter(applies_census == 1) %>% 
  left_join(extra_controls) %>% 
  filter(!state_id == "08")

## Use rdrobust to get optimal BW

out <- rdrobust(y = diff_df %>% pull(agg_left), 
                x = diff_df$runvar, c = 0,
                covs = diff_df[, covs])

bw <- out$bws[1]

## Weights

df <- diff_df %>% 
  filter(between(runvar, -bw, bw)) %>% 
  mutate(treat = runvar>0) %>% 
  mutate(w = kern(runvar, bw))

## Estimate

f <- "agg_left ~ runvar + treat + runvar*treat" %>% 
  paste0('+', paste0(covs, collapse = '+')) %>% 
  as.formula()
f_covs <- "agg_left ~ runvar + treat + runvar*treat" %>% 
  paste0('+', paste0(covs, collapse = '+')) %>% 
  paste0('+', paste0(covs_extra, collapse = '+')) %>% 
  as.formula()

## 

m1_muni_w <- lm(f, data = df, weights = df$w)
m1_muni_w_covs <- lm(f_covs, data = df, weights = df$w)

# Table A8: Effect on vote, parametric RDD ----

stargazer(list(m1_w, m1_lt_w, m1_muni_w), keep = c(2:7), 
          covariate.labels = c("Census shock", 
                               "Turnout (2009)",
                               "Employment / 1000 capita (2011)",
                               "Population density / km2 (2011)",
                               "Out-migration / 1000 capita (2011)"),
          keep.stat = c("n", "rsq", 'adj.rsq', "f"),
          style = 'ajps',
          digits = 2)

# Table A.9 : Controlling for pre-treatment differences ----

stargazer(list(m1_w_covs, m1_lt_w_covs, m1_muni_w_covs), 
          keep = c(2:11), 
          covariate.labels = c("Census shock", 
                               "Turnout (2009)",
                               "Employment / 1000 capita (2011)",
                               "Population density / km2 (2011)",
                               "Out-migration / 1000 capita (2011)",
                               "Left-wing vote share (2009)",
                               "Right-wing vote share (2009)",
                               "Change in left-wing vote share (2009-2013)",
                               "Change in right-wing vote share (2009-2013)"),
          keep.stat = c("n", "rsq", 'adj.rsq', "f"),
          style = 'ajps',
          digits = 2, type = 'latex')

## Regression w/ state interactions

## Def models

f_base <- "agg_left_party ~ runvar + treat + runvar*treat" %>% 
  paste0('+', paste0(covs, collapse = '+')) %>% 
  as.formula()
f_state <- "agg_left_party ~ runvar*state_id + treat*state_id + runvar*treat*state_id" %>% 
  paste0('+', paste0(covs, "*treat", collapse = '+')) %>% 
  as.formula()

## Base state: NRW

baselev <- "05"

df_bt <- df_bt %>% 
  mutate(state_id = relevel(factor(state_id), ref = baselev)) %>% 
  filter(complete.cases(turnout_party_2009_btw,
                        soz_vers_beschaeftigte_share,
                        pop_density_km2,
                        migration_out_share))

## Estimate

m1_bt <- lm(f_base, data = df_bt, weights = df_bt$w)
m2_bt <- lm(f_state, data = df_bt, weights = df_bt$w)

## Observations 

n_df <- df_bt %>% 
  count(state_id) %>% 
  dplyr::rename(term = state_id)

## Return

state_id_to_names <- function (state_id) 
{
  names <- recode(state_id, `01` = "Schleswig-Holstein", `02` = "Hamburg", 
                  `03` = "Niedersachsen", `04` = "Bremen", `05` = "North Rhine-Westphalia", 
                  `06` = "Hesse", `07` = "Rhineland-Palatinate", `08` = "Baden-Württemberg", 
                  `09` = "Bavaria", `10` = "Saarland", `11` = "Berlin", 
                  `12` = "Brandenburg", `13` = "Mecklenburg-Vorpommern", 
                  `14` = "Saxony", `15` = "Saxony-Anhalt", `16` = "Thuringia")
  return(names)
}

##

sid_list <- unique(df_bt$state_id)
sid_list_proper <- sid_list[-which(sid_list == baselev)] %>% 
  state_id_to_names() %>% as.character()
n_list <- n_df %>% 
  filter(!term == baselev) %>% 
  pull(n)
sid_list_with_n <- paste0(sid_list_proper, " (N=", n_list, ")")

## Labels

clabs <- c(sid_list_proper,
           "Census shock (ref.: NRW)",
           paste0("Census shock $\\times$ ",
                  sid_list_with_n),
           "Constant")

# Table A10: HTE by state ----

stargazer(list(m1_bt, m2_bt), 
          omit = "runvar|turnout|soz|density|migration",
          covariate.labels = clabs)


